Fun example for learning dplyr and tidyr not to be used for interpretation or analysis. Customized from here: https://rpubs.com/cholland9/885458

County population downloaded from here https://www.census.gov/data/tables/time-series/demo/popest/2020s-counties-total.html#par_textimage

Example data is for Cali and Indiana

co-est2021-pop-06.xlsx - cali co-est2021-pop-18 (1) - Indy

library(tidyverse)
Warning: package ‘tidyverse’ was built under R version 4.0.4Registered S3 methods overwritten by 'dbplyr':
  method         from
  print.tbl_lazy     
  print.tbl_sql      
── Attaching packages ──────────────────────────────────────── tidyverse 1.3.1 ──
✔ ggplot2 3.3.5      ✔ purrr   0.3.4 
✔ tibble  3.1.4      ✔ dplyr   1.0.10
✔ tidyr   1.2.1      ✔ stringr 1.5.0 
✔ readr   2.0.1      ✔ forcats 0.5.1 
Warning: package ‘ggplot2’ was built under R version 4.0.5Warning: package ‘tibble’ was built under R version 4.0.5Warning: package ‘tidyr’ was built under R version 4.0.5Warning: package ‘readr’ was built under R version 4.0.5Warning: package ‘purrr’ was built under R version 4.0.3Warning: package ‘dplyr’ was built under R version 4.0.5Warning: package ‘stringr’ was built under R version 4.0.5Warning: package ‘forcats’ was built under R version 4.0.3── Conflicts ─────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
covid_raw <- read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv")
Rows: 3342 Columns: 1097── Column specification ───────────────────────────────────────────────────────────────────
Delimiter: ","
chr    (6): iso2, iso3, Admin2, Province_State, Country_Region, Combined_Key
dbl (1091): UID, code3, FIPS, Lat, Long_, 1/22/20, 1/23/20, 1/24/20, 1/25/20, 1/26/20, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nrow (covid_raw)
[1] 3342
ncol(covid_raw)
[1] 1097
covid <- select(covid_raw, state= "Province_State", county= "Admin2", contains("/") )

covid
nrow(covid)
[1] 3342
ncol(covid)
[1] 1088
covid <- filter(covid, state== "California")
nrow(covid)
[1] 60
ncol(covid)
[1] 1088
covid_long <- pivot_longer(covid, names_to = "date", values_to = "c_cases", cols = contains("/"))

covid_long
nrow(covid_long)
[1] 65160
library(lubridate)
Warning: package ‘lubridate’ was built under R version 4.0.5Loading required package: timechange
Warning: package ‘timechange’ was built under R version 4.0.5
Attaching package: ‘lubridate’

The following objects are masked from ‘package:base’:

    date, intersect, setdiff, union
covid_long <- mutate(covid_long, date=mdy(date))
covid_long
covid_long <- filter(covid_long, date <= as.Date("2022-02-15"))
nrow(covid_long)
[1] 45360
covid_long <- group_by(covid_long, county)
covid_long <- arrange(covid_long, date)
covid_long <- mutate(covid_long, cases = c_cases - lag(c_cases, n=1L, default=0))
covid_last <- summarize (covid_long, total_cases=sum(c_cases))
nrow(covid_last)
[1] 60
ncol(covid_last)
[1] 2
covid_last
library(readxl)
mnpops <- read_excel("co-est2021-pop-06.xlsx")
mnpops
NA
covid_last <- inner_join(covid_last, mnpops)
Joining, by = "county"
covid_last
covid_last <- mutate(covid_last, per100k = total_cases / pop2020 * 100000)

covid_last
top10 <- slice_max(covid_last, per100k, n=10)
top10
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKRnVuIGV4YW1wbGUgZm9yIGxlYXJuaW5nIGRwbHlyIGFuZCB0aWR5ciBub3QgdG8gYmUgdXNlZCBmb3IgaW50ZXJwcmV0YXRpb24gb3IgYW5hbHlzaXMuIEN1c3RvbWl6ZWQgZnJvbSBoZXJlOiBodHRwczovL3JwdWJzLmNvbS9jaG9sbGFuZDkvODg1NDU4CgpDb3VudHkgcG9wdWxhdGlvbiBkb3dubG9hZGVkIGZyb20gaGVyZSBodHRwczovL3d3dy5jZW5zdXMuZ292L2RhdGEvdGFibGVzL3RpbWUtc2VyaWVzL2RlbW8vcG9wZXN0LzIwMjBzLWNvdW50aWVzLXRvdGFsLmh0bWwjcGFyX3RleHRpbWFnZQoKRXhhbXBsZSBkYXRhIGlzIGZvciBDYWxpIGFuZCBJbmRpYW5hCgpjby1lc3QyMDIxLXBvcC0wNi54bHN4IC0gY2FsaQpjby1lc3QyMDIxLXBvcC0xOCAoMSkgLSBJbmR5CgoKYGBge3J9CmxpYnJhcnkodGlkeXZlcnNlKQoKY292aWRfcmF3IDwtIHJlYWRfY3N2KCJodHRwczovL3Jhdy5naXRodWJ1c2VyY29udGVudC5jb20vQ1NTRUdJU2FuZERhdGEvQ09WSUQtMTkvbWFzdGVyL2Nzc2VfY292aWRfMTlfZGF0YS9jc3NlX2NvdmlkXzE5X3RpbWVfc2VyaWVzL3RpbWVfc2VyaWVzX2NvdmlkMTlfY29uZmlybWVkX1VTLmNzdiIpCgoKCmBgYAoKYGBge3J9Cm5yb3cgKGNvdmlkX3JhdykKYGBgCgoKYGBge3J9Cm5jb2woY292aWRfcmF3KQpgYGAKCmBgYHtyfQpjb3ZpZCA8LSBzZWxlY3QoY292aWRfcmF3LCBzdGF0ZT0gIlByb3ZpbmNlX1N0YXRlIiwgY291bnR5PSAiQWRtaW4yIiwgY29udGFpbnMoIi8iKSApCgpjb3ZpZApgYGAKCmBgYHtyfQpucm93KGNvdmlkKQpgYGAKCmBgYHtyfQpuY29sKGNvdmlkKQpgYGAKCmBgYHtyfQpjb3ZpZCA8LSBmaWx0ZXIoY292aWQsIHN0YXRlPT0gIkNhbGlmb3JuaWEiKQpgYGAKCmBgYHtyfQpucm93KGNvdmlkKQpgYGAKCmBgYHtyfQpuY29sKGNvdmlkKQpgYGAKCmBgYHtyfQpjb3ZpZF9sb25nIDwtIHBpdm90X2xvbmdlcihjb3ZpZCwgbmFtZXNfdG8gPSAiZGF0ZSIsIHZhbHVlc190byA9ICJjX2Nhc2VzIiwgY29scyA9IGNvbnRhaW5zKCIvIikpCgpjb3ZpZF9sb25nCmBgYAoKCmBgYHtyfQpucm93KGNvdmlkX2xvbmcpCmBgYAoKYGBge3J9CmxpYnJhcnkobHVicmlkYXRlKQpjb3ZpZF9sb25nIDwtIG11dGF0ZShjb3ZpZF9sb25nLCBkYXRlPW1keShkYXRlKSkKY292aWRfbG9uZwpgYGAKCmBgYHtyfQpjb3ZpZF9sb25nIDwtIGZpbHRlcihjb3ZpZF9sb25nLCBkYXRlIDw9IGFzLkRhdGUoIjIwMjItMDItMTUiKSkKYGBgCgpgYGB7cn0KbnJvdyhjb3ZpZF9sb25nKQpgYGAKCmBgYHtyfQpjb3ZpZF9sb25nIDwtIGdyb3VwX2J5KGNvdmlkX2xvbmcsIGNvdW50eSkKYGBgCgoKYGBge3J9CmNvdmlkX2xvbmcgPC0gYXJyYW5nZShjb3ZpZF9sb25nLCBkYXRlKQpgYGAKCgpgYGB7cn0KY292aWRfbG9uZyA8LSBtdXRhdGUoY292aWRfbG9uZywgY2FzZXMgPSBjX2Nhc2VzIC0gbGFnKGNfY2FzZXMsIG49MUwsIGRlZmF1bHQ9MCkpCmBgYAoKCmBgYHtyfQpjb3ZpZF9sYXN0IDwtIHN1bW1hcml6ZSAoY292aWRfbG9uZywgdG90YWxfY2FzZXM9c3VtKGNfY2FzZXMpKQpucm93KGNvdmlkX2xhc3QpCm5jb2woY292aWRfbGFzdCkKCmNvdmlkX2xhc3QKYGBgCgpgYGB7cn0KbGlicmFyeShyZWFkeGwpCm1ucG9wcyA8LSByZWFkX2V4Y2VsKCJjby1lc3QyMDIxLXBvcC0wNi54bHN4IikKbW5wb3BzCgpgYGAKCmBgYHtyfQpjb3ZpZF9sYXN0IDwtIGlubmVyX2pvaW4oY292aWRfbGFzdCwgbW5wb3BzKQpjb3ZpZF9sYXN0CmBgYAoKYGBge3J9CmNvdmlkX2xhc3QgPC0gbXV0YXRlKGNvdmlkX2xhc3QsIHBlcjEwMGsgPSB0b3RhbF9jYXNlcyAvIHBvcDIwMjAgKiAxMDAwMDApCgpjb3ZpZF9sYXN0CmBgYAoKCmBgYHtyfQp0b3AxMCA8LSBzbGljZV9tYXgoY292aWRfbGFzdCwgcGVyMTAwaywgbj0xMCkKdG9wMTAKYGBgCgoKCg==